{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 隐语义模型的梯度下降求解"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.算法实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "\"\"\"\n",
    "@输入参数：\n",
    "R：M*N 的评分矩阵\n",
    "P：初始化用户特征矩阵M*K\n",
    "Q：初始化物品特征矩阵N*K\n",
    "K：隐特征向量个数\n",
    "steps: 最大迭代次数\n",
    "alpha：步长\n",
    "lamda：正则化系数\n",
    "\n",
    "@输出：\n",
    "分解之后的 P，Q\n",
    "\"\"\"\n",
    "\n",
    "def LFM_grad_desc(R, K=5, steps=3000, alpha=0.0002, lamda=0.004):\n",
    "    M = len(R)\n",
    "    N = len(R[0])\n",
    "    P = np.random.rand(M,K)\n",
    "    Q = np.random.rand(N,K)\n",
    "    Q = Q.T\n",
    "    \n",
    "    for step in range(steps):\n",
    "        for i in range(M):\n",
    "            for j in range(N):\n",
    "                # 如果评分大于0，表示有评分，才考虑误差\n",
    "                if R[i][j] > 0:\n",
    "                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])\n",
    "                    for k in range(K):\n",
    "                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - 2 * lamda * P[i][k])\n",
    "                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - 2 * lamda * Q[k][j])\n",
    "\n",
    "        # 根据更新之后的P、Q计算预测评分矩阵\n",
    "        eR = np.dot(P,Q)\n",
    "        # 计算当前损失函数\n",
    "        e = 0\n",
    "        for i in range(M):\n",
    "            for j in range(N):\n",
    "                if R[i][j] > 0:\n",
    "                    e += (R[i][j]-np.dot(P[i,:],Q[:,j]))**2\n",
    "                    for k in range(K):\n",
    "                        e += lamda * (P[i][k]**2 + Q[k][j]**2)\n",
    "        \n",
    "        if e < 0.001:\n",
    "            break\n",
    "    return P, Q.T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 0, 1, 0, 0],\n",
       "       [0, 1, 0, 0, 1],\n",
       "       [1, 0, 0, 1, 0],\n",
       "       [0, 0, 1, 0, 0]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "R = np.array([[1,0,1,0,0],\n",
    "              [0,1,0,0,1],\n",
    "              [1,0,0,1,0],\n",
    "              [0,0,1,0,0]])\n",
    "\n",
    "nP,nQ = LFM_grad_desc(R)\n",
    "R"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.1552958 ,  0.4671987 ,  0.57300613,  0.60611411,  0.33465085],\n",
       "       [ 0.24276829,  0.49862254,  0.82293843,  0.13913211,  0.3202733 ],\n",
       "       [-0.04162319,  0.49651099, -0.0013388 ,  0.60885689,  0.6311348 ],\n",
       "       [ 0.09864814,  0.16555974,  0.85228676,  0.79004128,  0.52623308]])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.21608466, 0.10855826, 0.32945517, 0.81660581, 0.73945063],\n",
       "       [0.11321961, 0.22313506, 0.93589454, 0.41266649, 0.11920088],\n",
       "       [0.17955418, 0.61763027, 0.35939077, 0.29880653, 0.71711584],\n",
       "       [0.84933477, 0.85311698, 0.37122398, 0.27773627, 0.68709738],\n",
       "       [0.74862849, 0.90911455, 0.1804738 , 0.35901584, 0.48934658]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nQ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.01546924, 0.94811791, 0.94346746, 1.14146447, 1.02577339],\n",
       "       [0.7281518 , 1.00452191, 0.91855755, 1.19577036, 0.99024236],\n",
       "       [1.00835427, 0.43131013, 0.93323262, 0.99048565, 0.94741636],\n",
       "       [1.3543552 , 1.23451262, 1.03971096, 1.12241289, 0.91932669]])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nP.dot(nQ.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
